
# A4a_price_index_agg_entry
#==============================================================================

# Description: This code computes the difference in the gains from trade between
# poor and rich on average on aggregate in the entry-only model

rm(list = ls())
options(warn=-1)

library('data.table')
library('stargazer')
library('ggplot2')
library("ggrepel")  

setwd("D:/data_replication")


# Import and format list of products for each country
#===============================================================================

product_id <- fread("estimation/2_product_list/product_id_declarant.csv")

pc8plus_i_k <- fread("estimation/4_demand_estimation/pc8plus_i_k.csv")
names(pc8plus_i_k) <- c("product_id", "V1", "k")
pc8plus_i_k <- pc8plus_i_k[ , .(product_id, k)]
pc8plus_i_k[ , product_id := as.integer(product_id)]

k_to_i_k <- fread("estimation/5_supply_side/k_to_i_k.csv")

product_id <- merge(product_id, pc8plus_i_k, by = "product_id", all = T)
product_id <- product_id[k != 0]
product_id <- product_id[ , .(declarant, k)]

product_id <- merge(product_id, k_to_i_k, by = "k", all = T)
product_id <- product_id[ , .(i_k, declarant)]
setkey(product_id, declarant, i_k)

save(file = "estimation/5_supply_side/product_id.RData", product_id)
rm(list = ls())


# Import entry-only dataset
#===============================================================================

load('estimation/5_supply_side/data_PI_entry.RData')

data[ , u_beta_true_20 := log(P_true_sorted_20) - 1]                                     
data[ , u_beta_true_80 := log(P_true_sorted_80) - 1]                                    
data[ , u_beta_counter_20 := log(P_counter_sorted_20) - 1]
data[ , u_beta_counter_80 := log(P_counter_sorted_80) - 1]

data[ , diff := P_counter_sorted_20 / P_true_sorted_20 - P_counter_sorted_80 / P_true_sorted_80]

data <- data[abs(diff) < 10]       
data <- data[beta_price_20 < 1]       
data <- data[beta_price_80 < 1]      
data = data[(abs(u_beta_counter_80) < 1e-13 | abs(u_beta_counter_20) < 1e-13 | abs(u_beta_true_80) < 1e-13 | abs(u_beta_true_20) < 1e-13) == F]

data_entry = data
rm(data)


# Import full-adjustment dataset
#===============================================================================

load('estimation/5_supply_side/data_PI_full_adj.RData')

data[ , u_beta_true_20 := log(P_true_sorted_20) - 1]                                     
data[ , u_beta_true_80 := log(P_true_sorted_80) - 1]                                    
data[ , u_beta_counter_20 := log(P_counter_sorted_20) - 1]
data[ , u_beta_counter_80 := log(P_counter_sorted_80) - 1]

data[ , diff := P_counter_sorted_20 / P_true_sorted_20 - P_counter_sorted_80 / P_true_sorted_80]

data <- data[index_mc == F]                                                     # Remove cases in which demand is not sufficiently elastic 
data <- data[abs(diff) < 1] 
data <- data[beta_price_20 < 1]       
data <- data[beta_price_80 < 1]    
data = data[(abs(u_beta_counter_80) < 1e-13 | abs(u_beta_counter_20) < 1e-13 | abs(u_beta_true_80) < 1e-13 | abs(u_beta_true_20) < 1e-13) == F]

data_full = data
rm(data)


# Merge and keep common product categories
#===============================================================================

index_both = merge(data_full, data_entry, by = c("product", "Year", "Quarter", "Declarant"))
index_both <- index_both[ , .(product, Year, Quarter, Declarant)]

data = data_entry
data = merge(data, index_both, by = c("product", "Year", "Quarter", "Declarant"))

#===============================================================================


# Construct Price Index
#===============================================================================

# Remove extreme observations
#-------------------------------------------------------------------------------

data_PI = data
data_PI = data_PI[diff < 1 & diff > -1]
data_PI = data_PI[(is.na(u_beta_counter_80) | is.na(u_beta_counter_20) | is.na(u_beta_true_80) | is.na(u_beta_true_20)) == F]
data_PI = data_PI[(abs(u_beta_counter_80) < 1e-13 | abs(u_beta_counter_20) < 1e-13 | abs(u_beta_true_80) < 1e-13 | abs(u_beta_true_20) < 1e-13) == F]
data_PI = data_PI[index_mc == F]                                                # Remove cases in which demand is not sufficiently elastic 

data_PI[ , v_true_20 := beta_price_20 * log(P_true_sorted_20 / (exp(1) * gamma(1 - (1/beta_price_20))))]
data_PI[ , v_counter_20 := beta_price_20 * log(P_counter_sorted_20 / (exp(1) * gamma(1 - (1/beta_price_20))))]
data_PI[ , v_true_80 := beta_price_80 * log(P_true_sorted_80 / (exp(1) * gamma(1 - (1/beta_price_80))))]
data_PI[ , v_counter_80 := beta_price_80 * log(P_counter_sorted_80 / (exp(1) * gamma(1 - (1/beta_price_80))))]

data_PI <- data_PI[ , obs := .N, by = c('Year', 'Quarter', 'Declarant')]

# Merge in Cobb-Douglas Weights
#-------------------------------------------------------------------------------

w = read.csv("estimation/4_demand_estimation/4_cobb_douglas_weights/weights_i_k.csv")
colnames(w)[1:4] = c("product", "Year", "Quarter", "Declarant")
data_PI = merge(data_PI, w, by = c('product', 'Year', 'Quarter', 'Declarant'))

load("estimation/5_supply_side/product_id.RData")
colnames(product_id) = c("product", "Declarant")
product_id[ , idx_PI := 1]
data_PI = merge(data_PI, product_id, by = c('product', 'Declarant'), all = T)
data_PI <- data_PI[is.na(Year) == F]
data_PI <- data_PI[idx_PI == 1]

data_PI = data_PI[s_20_i_k != 0]
data_PI = data_PI[s_80_i_k != 0]

data_PI[ , s_20_sum := sum(s_20_i_k), by = c('Year', 'Quarter', 'Declarant')]
data_PI[ , s_80_sum := sum(s_80_i_k), by = c('Year', 'Quarter', 'Declarant')]
data_PI[ , s_20_i_k := s_20_i_k / s_20_sum]
data_PI[ , s_80_i_k := s_80_i_k / s_80_sum]

# Compute Price Index
#-------------------------------------------------------------------------------

data_PI <- data_PI[ , PI_1 := exp(1 - sum(s_20_i_k*log(s_20_i_k))), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_2 := prod(exp((s_20_i_k/beta_price_20)*v_true_20)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_3 := prod(gamma(1 - s_20_i_k/beta_price_20)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_true_20 := PI_1*PI_2*PI_3, by = c('Year', 'Quarter', 'Declarant')]

data_PI <- data_PI[ , PI_1 := exp(1 - sum(s_80_i_k*log(s_80_i_k))), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_2 := prod(exp((s_80_i_k/beta_price_80)*v_true_80)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_3 := prod(gamma(1 - s_80_i_k/beta_price_80)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_true_80 := PI_1*PI_2*PI_3, by = c('Year', 'Quarter', 'Declarant')]

data_PI <- data_PI[ , PI_1 := exp(1 - sum(s_20_i_k*log(s_20_i_k))), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_2 := prod(exp((s_20_i_k/beta_price_20)*v_counter_20)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_3 := prod(gamma(1 - s_20_i_k/beta_price_20)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_counter_20 := PI_1*PI_2*PI_3, by = c('Year', 'Quarter', 'Declarant')]

data_PI <- data_PI[ , PI_1 := exp(1 - sum(s_80_i_k*log(s_80_i_k))), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_2 := prod(exp((s_80_i_k/beta_price_80)*v_counter_80)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_3 := prod(gamma(1 - s_80_i_k/beta_price_80)), by = c('Year', 'Quarter', 'Declarant')]
data_PI <- data_PI[ , PI_counter_80 := PI_1*PI_2*PI_3, by = c('Year', 'Quarter', 'Declarant')]

data_PI <- data_PI[ , diff := PI_counter_20 / PI_true_20 - PI_counter_80 / PI_true_80]
data_PI <- data_PI[ , gft_20 := PI_counter_20 / PI_true_20]
data_PI <- data_PI[ , gft_80 := PI_counter_80 / PI_true_80]
data_PI <- data_PI[ , head(.SD, 1), by = c('Year', 'Quarter', 'Declarant')]

#===============================================================================


# Run Regression, save, and plot
#===============================================================================

Reg_P_summary <- lm(diff ~ 0 + as.factor(Declarant), data = data_PI)
reg_gft_20 <- lm(gft_20 ~ 0 + as.factor(Declarant), data = data_PI)
reg_gft_80 <- lm(gft_80 ~ 0 + as.factor(Declarant), data = data_PI)

# Format Data to Save Regression Output

declarant <- as.matrix(Reg_P_summary$coefficients)
declarant <- gsub("as.factor\\(Declarant\\)","",rownames(declarant))
declarant <- as.numeric(declarant)
income_data <- fread(file = "estimation/5_supply_side/gdp_per_capita_2007.csv")

# Save Regression

output <- merge(income_data, data.table(declarant, Reg_P_summary$coefficients), by = "declarant")
colnames(output) <- c(colnames(output)[1:4], "FE_declarant")
write.csv(file = "estimation/5_supply_side/price_index/results/diff_PI_entry.csv", output)

output_gft_20 <- merge(income_data, data.table(declarant, reg_gft_20$coefficients), by = "declarant")
colnames(output_gft_20) <- c(colnames(output)[1:4], "gft_20")
output_gft_80 <- merge(income_data, data.table(declarant, reg_gft_80$coefficients), by = "declarant")
colnames(output_gft_80) <- c(colnames(output)[1:4], "gft_80")
output_gft_80$year = NULL
output_gft_80$country = NULL
output_gft_80$gdp_per_capita_declarant = NULL
output_gft <- merge(output_gft_20, output_gft_80, by = "declarant")
write.csv(file = "robustness/overall_PI/gft_entry_agg.csv", output_gft)

# Plot
#-------------------------------------------------------------------------------

rm(list = ls())

output <- fread('estimation/5_supply_side/price_index/results/diff_PI_entry.csv')
iso3 <- fread('estimation/5_supply_side/ISO3_declarant_codes.csv')

output[ , gdp_log := log(gdp_per_capita_declarant)]
setkey(output, declarant)
setkey(iso3, declarant)

output <- output[iso3]
output <- output[is.na(country) == F]
output = output[declarant != 18] 
output = output[declarant != 46] 
output = output[declarant != 600] 

reg <- lm(FE_declarant ~ gdp_log, data = output)
summary(reg)
cor(output$FE_declarant, output$gdp_log)

plot_diff_entry <- ggplot(output, aes(x=gdp_log, y=FE_declarant)) +
  geom_text_repel(label=output$ISO3, size = 5) +
  theme(text = element_text(size=17)) +
  geom_smooth(method=lm) +
  labs(x = "GDP per capita (in logs)", y = expression(paste(Delta, "Welfare"["Poor"]," - ", Delta, "Welfare"["Rich"]))) 

ggsave("estimation/5_supply_side/price_index/results/plot_PI_entry.png", plot = plot_diff_entry)


